In [1]:
import numpy as np
In [2]:
np.random.seed(1)
statistical_population = np.random.randint(2, size = 100000)
random_sample = np.random.choice(statistical_population, size = 1000)
In [3]:
#истинное значение доли
statistical_population.mean()
Out[3]:
In [4]:
random_sample.mean()
Out[4]:
In [5]:
from statsmodels.stats.proportion import proportion_confint
In [6]:
normal_interval = proportion_confint(sum(random_sample), len(random_sample), method = 'normal')
In [7]:
print 'normal_interval [%f, %f] with width %f' % (normal_interval[0],
normal_interval[1],
normal_interval[1] - normal_interval[0])
In [8]:
wilson_interval = proportion_confint(sum(random_sample), len(random_sample), method = 'wilson')
In [9]:
print 'wilson_interval [%f, %f] with width %f' % (wilson_interval[0],
wilson_interval[1],
wilson_interval[1] - wilson_interval[0])
In [10]:
from statsmodels.stats.proportion import samplesize_confint_proportion
In [11]:
n_samples = int(np.ceil(samplesize_confint_proportion(random_sample.mean(), 0.01)))
n_samples
Out[11]:
In [12]:
np.random.seed(1)
random_sample = np.random.choice(statistical_population, size = n_samples)
In [13]:
normal_interval = proportion_confint(sum(random_sample), len(random_sample), method = 'normal')
In [14]:
print 'normal_interval [%f, %f] with width %f' % (normal_interval[0],
normal_interval[1],
normal_interval[1] - normal_interval[0])